This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
countries_aggregated = read.csv("data/countries-aggregated.csv")
key_countries_pivoted = read.csv("data/key-countries-pivoted.csv")
reference = read.csv("data/reference.csv")
time_series_19_covid_combined = read.csv("data/time-series-19-covid-combined.csv")
us_confirmed = read.csv("data/us_confirmed.csv")
us_deaths = read.csv("data/us_deaths.csv")
worldwide_aggregated = read.csv("data/worldwide-aggregated.csv")
world_cities = read.csv("data/worldcities.csv")
Statewide = read.csv("data/Statewide (1).csv")
library(ggplot2)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.3 ✓ dplyr 1.0.0
## ✓ tidyr 1.1.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(leaflet)
library(sp)
library(flexdashboard)
library(broom)
library(modelr)
##
## Attaching package: 'modelr'
## The following object is masked from 'package:broom':
##
## bootstrap
us_confirmed_updated = us_confirmed %>%
group_by(Province.State) %>%
summarize(
Lat = mean(Lat),
Long = mean(Long),
total_cases = sum(Case)
)
## `summarise()` ungrouping output (override with `.groups` argument)
us_confirmed_updated
## # A tibble: 58 x 4
## Province.State Lat Long total_cases
## <chr> <dbl> <dbl> <int>
## 1 Alabama 31.9 -84.2 2024563
## 2 Alaska 56.4 -138. 60536
## 3 American Samoa -14.3 -170. 0
## 4 Arizona 29.7 -98.3 3474676
## 5 Arkansas 34.0 -90.0 993778
## 6 California 36.6 -117. 12656799
## 7 Colorado 37.8 -102. 2361609
## 8 Connecticut 33.3 -58.1 3659874
## 9 Delaware 23.5 -45.3 797079
## 10 Diamond Princess 0 0 5923
## # … with 48 more rows
us_confirmed_updated %>%
leaflet() %>%
addProviderTiles("OpenStreetMap.Mapnik") %>%
addMarkers(
lng = ~Long,
lat = ~Lat,
popup = ~paste("</h3><br>","State:", Province.State, "</h3><br>","Cases:", total_cases, sep=" ")
)
ggplotly(us_confirmed_updated %>%
ggplot() + geom_col(mapping=aes(x=Province.State, y=total_cases)))
ggplotly(Statewide %>%
ggplot() + geom_col(mapping=aes(x=State, y=Effective.Date))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
reference_updated = reference %>% drop_na() %>%
group_by(Province.State) %>%
summarize(
total_pop = sum(Population)
)
## `summarise()` ungrouping output (override with `.groups` argument)
ggplotly(reference_updated %>%
ggplot() + geom_col(mapping=aes(x=Province.State, y=total_pop))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
us_confirmed_updated1 = us_confirmed %>%
group_by(Province.State) %>%
summarize(
total_cases = sum(Case)
)
## `summarise()` ungrouping output (override with `.groups` argument)
library(dplyr)
us_confirmed_pop = reference_updated %>% inner_join(us_confirmed_updated1 , by="Province.State")
us_confirmed_pop
## # A tibble: 56 x 3
## Province.State total_pop total_cases
## <chr> <int> <int>
## 1 Alabama 9806370 2024563
## 2 Alaska 1463090 60536
## 3 American Samoa 55641 0
## 4 Arizona 14557434 3474676
## 5 Arkansas 6035608 993778
## 6 California 79024446 12656799
## 7 Colorado 11517472 2361609
## 8 Connecticut 7130574 3659874
## 9 Delaware 1947528 797079
## 10 District of Columbia 1411498 740559
## # … with 46 more rows
confirmed_pop_model = lm(total_pop~total_cases, data=us_confirmed_pop)
confirmed_pop_model%>%
glance()
## # A tibble: 1 x 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.487 0.477 1.06e7 51.2 2.29e-9 1 -985. 1975. 1981.
## # … with 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
confirmed_pop_df = us_confirmed_pop %>%
add_predictions(confirmed_pop_model)%>%
add_residuals(confirmed_pop_model)
confirmed_pop_df
## # A tibble: 56 x 5
## Province.State total_pop total_cases pred resid
## <chr> <int> <int> <dbl> <dbl>
## 1 Alabama 9806370 2024563 9733350. 73020.
## 2 Alaska 1463090 60536 5988150. -4525060.
## 3 American Samoa 55641 0 5872714. -5817073.
## 4 Arizona 14557434 3474676 12498568. 2058866.
## 5 Arkansas 6035608 993778 7767748. -1732140.
## 6 California 79024446 12656799 30007940. 49016506.
## 7 Colorado 11517472 2361609 10376062. 1141410.
## 8 Connecticut 7130574 3659874 12851721. -5721147.
## 9 Delaware 1947528 797079 7392663. -5445135.
## 10 District of Columbia 1411498 740559 7284885. -5873387.
## # … with 46 more rows
ggplot(confirmed_pop_df) +
geom_point(mapping = aes(x = pred, y = total_cases)) +
geom_abline(slope = 1, intercept = 0, color="red")
ggplot(confirmed_pop_df) +
geom_point(mapping = aes(x = pred, y = total_cases)) +
geom_abline(slope = confirmed_pop_model$coefficients[2], intercept = confirmed_pop_model$coefficients[1], color="red")
confirmed_pop_df %>%
ggplot()+geom_histogram(mapping=aes(x=resid), bins=30)